library(plyr)
library(estimatr)
library(emmeans)
library(tidyverse)
library(magrittr)
library(broom)

t1 <- "https://github.com/thomasjwood/full_fact/raw/main/ff_mt.rds" %>% 
  url %>% 
  gzcon %>% 
  readRDS %>% 
  mutate(
    out_agree = out_agree %>% 
      mapvalues(
        c("Tend to disagree",
          "Tend to agree"),
        c("Disagree",
          "Agree")
      ) %>% 
      factor(
        c("Strongly disagree",
          "Disagree",
          "Neither agree nor disagree",
          "Agree",
          "Strongly agree")
      ),
    out_true = out_true %>% 
      factor(
        c("True",
          "Probably true",
          "Not sure",
          "Probably false",
          "False") %>% 
          rev
      ),
    agree_num = out_agree %>% 
      as.numeric,
    true_num = out_true %>% 
      as.numeric,
    cond = cond %>% 
      factor(
        c("cond_items",
          "cond_misinfo",
          "cond_corr")
      )
  )

t1$com_num <- t1$agree_num %>% 
  add(
    t1$true_num
  ) %>% 
  divide_by(2)

t2 <- t1 %>% 
  mutate(
    country = issue %>% 
      str_detect(
        "global|saltwater"
      ) %>% 
      ifelse(
        "Multi-country",
        country
      )
  ) %>%
  filter(
    wave == "wave 1"
  ) %>% 
  group_by(
    country, wave, issue
  ) %>% 
  nest 

t2$mods <- t2$data %>% 
  map(
    ~lm_robust(
      com_num ~ cond, 
      data = .x 
    )
  )

t2$emm <- t2$mods %>% 
  map2(
    t2$data,
    ~emmeans(
      .x, 
      consec ~ cond, 
      data = .y
    )
  )

t2$edf <- t2$emm %>% 
  map(
    ~extract2(., 1) %>% 
      tidy
  )

t2$cont <- t2$emm %>% 
  map(
    ~extract2(., 2) %>% 
      tidy
  )

# first, fitted values

t3 <- t2 %>% 
  ungroup %>% 
  select(
    country, issue, edf 
  ) %>% 
  pmap_dfr(
    function(
      country, issue, edf
    )
      
      edf %>% 
      mutate(
        issue = issue, 
        country = country
      )
  ) %>% 
  mutate(
    horiz = "Group means" 
  ) %>% 
  bind_rows(
    t2 %>% 
      ungroup %>% 
      select(
        country, issue, cont
      ) %>% 
      pmap_dfr(
        function(
          country, issue, cont
        )
          
          cont %>% 
          mutate(
            issue = issue, 
            country = country
          )
      ) %>% 
      rename(p.value = "adj.p.value") %>% 
      mutate(
        horiz = contrast  %>% 
          str_detect("items") %>% 
          ifelse(
            "Misinformation effect",
            "Correction effect"
          )
      ) %>% 
      select(-term, -contrast, -null.value)
  ) %>% 
  mutate(
    lo = estimate %>%
      subtract(
        std.error %>%
          multiply_by(2.58)
      ),
    hi = estimate %>%
      add(
        std.error %>%
          multiply_by(2.58)
      ),
    lab = estimate %>%
      round(2) 
  )

t3$lab <- t3$cond %>% 
  is.na %>% 
  not %>% 
  ifelse(
    t3$lab,
    t3$lab %>% 
      str_c(
        gtools::stars.pval(
          t3$p.value
        )
      )
  )

t3$lab %<>% str_trim 

t3$lab %<>% 
  equals("0") %>% 
  ifelse(
    t3$estimate %>% 
      round(4) %>% 
      prettyNum(scientific = F),
    t3$lab
  )

# remove leading zeroes

t3$lab %<>% 
  str_extract("\\d") %>% 
  equals("0") %>% 
  ifelse(
    t3$lab %>% 
      str_replace("0\\.", "\\."),
    t3$lab
  )

# add decimal to missing decimals

t3$lab %<>% 
  str_detect(
    "\\."
  ) %>% 
  ifelse(
    t3$lab,
    t3$lab %>% 
      str_replace(
        '\\d', 
        t3$lab %>% 
          str_extract("\\d") %>% 
          str_c("\\.")
      )
  )

# add trailing zero

t3$lab  %>%  
  str_count("\\d") %>% 
  equals(1) %>% 
  ifelse(
    t3$lab %>% 
      str_replace(
        "\\d",
        t3$lab %>% 
          str_extract("\\d") %>% 
          str_pad(width = 2, side = "right", pad = "0")
      ),
    t3$lab
  )




t3$issue %<>% 
  factor(
    t3 %>% 
      filter(
        horiz %>% 
          str_detect("Group means")
      ) %>% 
      group_by(issue) %>% 
      summarize(mu = estimate %>% mean) %>% 
      arrange(desc(mu)) %>% 
      use_series(issue)
  )


t3$country %<>% 
  factor(
    t3 %>% 
      filter(
        horiz %>% 
          str_detect("Group means")
      ) %>% 
      group_by(country) %>% 
      summarize(
        mu = estimate %>% mean
      ) %>% 
      arrange(desc(mu)) %>% 
      use_series(country) %>% 
      extract(
        c(5, 1:4)
      )
  )

t3$horiz %<>% 
  fct_inorder


t3$lab[t3$cond %>% is.na %>% not] <- t3$estimate[
  t3$cond %>% is.na %>% not
]  %>%  
  round(1)

t3$lab <- case_when(
  t3$lab %>% 
    str_detect("\\d\\.") &
  t3$lab %>% 
    str_count("\\d") %>% 
    equals(1) ~ 
    t3$lab %>% 
    str_replace(
      "\\d\\.",
      t3$lab %>% 
        str_extract(
          "\\d"
        ) %>% 
        str_c(".0")
    ),
  TRUE ~ t3$lab
)


t3$horiz %<>% 
  mapvalues(
    c("Group means"),
    c("Conditional means")
  )


t3$country %<>% 
  mapvalues(
    "nigeria",
    "Nigeria"
  )

t3$issue %>% 
  levels %>% 
  dput


t3$issue %<>% 
  mapvalues(
    c("nigeria_youthue", "nigeria_birthreg", "safrica_tobacco", "nigeria_chancellor", "nigeria_malaria",
      
      "haigh_knife", "pichetto_shanty", "safrica_latrines", "ashworth_nurses", "fernandez_debt",
      
      "fernandez_killings", "safrica_civserv", "nigeria_crossriver", "macri_debt", "macri_arrests",
      
      "safrica_resbank", "cele_crimestats", "johnson_educ", "global_cooling", "vaccine_cancer",
      
      "javid_homeless", "saltwater_covid"),
    
    c("Youth U/E 70%", 
      "70% births not registered", 
      "49% men, 34% women, smoke tobacco",
      "Nigerian unis 15 female chancellors",
      "Malaria kills 300k annually",
      
      "Knife crime all-time high",
      "4k shanty towns in Buenos Aires",
      "100s children drown in pit latrines",
      "200k nurses quit NHS since 2010",
      "Debt increased 38% to 100% GDP",
      
      "3,262 murders last year",
      "80% of govt spending is salaries",
      "Cross River lowest national crime",
      "Debt is today 100% GDP",
      "85k arrested for drug trafficking",
      
      "German owns 57% SA Reserve Bank",
      "SA only country releases crime stats",
      "Johnson invests £14bn in education",
      "Global cooling '16-18",
      "Vaccines might cause cancer",
      
      "Homeless halved since 2008",
      "Saltwater kills COVID"
    )
  )


# changing facet labels

t3$horiz %<>%
  mapvalues(
    c("Conditional means", "Misinformation effect", "Correction effect"),
    c("Conditional means", 
      "Misinformation\neffect\n(Misinfo - Control)", 
      "Correction\neffect\n(Correction - Misinfo)")
    ) %>% 
  factor(
    c("Conditional means", 
      "Misinformation\neffect\n(Misinfo - Control)", 
      "Correction\neffect\n(Correction - Misinfo)")
    )

t3$country <- t3$country %>% 
  str_replace_all("\\s", "\n") %>%
  str_replace_all("\\-", "\\-\n") %>% 
  factor(
    t3$country %>% 
      levels %>% 
      str_replace_all("\\s", "\n") %>% 
      str_replace_all("\\s", "\n") %>%
      str_replace_all("\\-", "\\-\n")
    )

 ggplot() +
  geom_blank(
    aes(y = issue, color = cond,
        xmin = lo, xmax = hi),
    data = t3 %>% 
      filter(
        cond %>% 
          is.na %>% 
          not
      ),
    position = position_dodge(
      width = .85
    ),
    show.legend = F
    
  ) +
  geom_path(
    aes(
      estimate, 
      issue, 
      color = cond,
      group = cond
    ),
    data = t3 %>% 
      filter(
        cond %>% 
          is.na %>% 
          not
      ) %>% 
      ungroup %>% 
      arrange(
        country, horiz, cond, issue
      ),
    position = position_dodge(
      width = .85
    ),
    show.legend = F
  ) +
  geom_linerange(
    aes(x = estimate,
        y = issue, 
        color = cond,
        xmin = lo, xmax = hi),
    data = t3 %>% 
      filter(
        cond %>% 
          is.na %>% 
          not
      ),
    position = position_dodge(
      width = .85
    ),
    show.legend = F
  ) +
  geom_label(
    aes(
      x, y, label = lab, color = cond
    ),
    fontface  = "italic",
    size = 2.25,
    data = tribble(
      ~y, ~x, ~lab, ~cond,
      1.0, 1.8, "Correction", "cond_corr",
      1.4, 2.3,  "Control", "cond_items",
      1.9, 2.75, "Misinfo", "cond_misinfo"
    ) %>%
      mutate(
        horiz = t3$horiz %>%
          levels %>%
          extract2(1) %>%
          factor(
            t3$horiz %>%
              levels
          ),
        country = t3$country %>%
          levels %>%
          extract2(1) %>%
          factor(
            t3$country %>%
              levels
          )
      )
  ) +
  geom_point(
    aes(estimate,
        issue, 
        color = cond
    ),
    data = t3 %>% 
      filter(
        cond %>% 
          is.na %>% 
          not
      ),
    position = position_dodge(
      width = .85
    ),
    size = 5.5,
    shape = 21) +
  geom_point(
    aes(estimate,
        issue,
        group = cond,
    ),
    data = t3 %>%
      filter(
        cond %>%
          is.na %>%
          not
      ),
    position = position_dodge(
      width = .85),
    color = "white",
    size = 5.25
  ) +
  geom_text(
    aes(estimate,
        issue,
        group = cond,
        label = lab,
        color = cond
    ),
    data = t3 %>%
      filter(
        cond %>%
          is.na %>%
          not
      ),
    position = position_dodge(
      width = .85
    ),
    size = 2.15,
    show.legend = F
    ) +
  geom_vline(
    aes(
      xintercept = yint
    ),
    t3 %>% 
      filter(
        cond %>% 
          is.na
      ) %>% 
      mutate(yint = 0),
    linetype = "dotted"
  ) +
  geom_label(
    aes(
      estimate,
      issue,
      label = lab
    ),
    nudge_y = .3, 
    label.size = 0, 
    fill = "grey95",
    t3 %>%
      filter(
        cond %>%
          is.na
      ),
    size = 2.4
    )  +
  geom_linerange(
    aes(
      y = issue, xmin = lo, xmax = hi
    ),
    t3 %>% 
      filter(
        cond %>% 
          is.na
      ),
    size = .25
  ) +
  geom_point(
    aes(
      estimate,
      issue
    ),
    # shape = 21,
    # size = 8.5,
    fill = "white",
    t3 %>%
      filter(
        cond %>%
          is.na
      )
  ) +
  geom_segment(
    aes(
      x = x, y = y, xend = xend, yend = yend
    ),
    arrow = arrow(length = unit(0.2, "cm"), type = "closed"),
    data = tribble(
      ~x, ~y, ~xend, ~yend,
      0, .475, .5, .475,
      0, .475, -.5, .475
      ) %>% 
      mutate(
        country = t3$country %>% 
          levels %>% 
          extract(5) %>% 
          factor(
            t3$country %>% 
              levels 
          ),
        horiz = t3$horiz %>% 
          levels %>% 
          extract(2:3) %>% 
          factor(
            t3$horiz %>% 
              levels
          )
      ),
    size = .25
  ) +
  geom_text(
    aes(
      x, y, label = label,
    ),
    data = tribble(
      ~x,    ~y, ~label,
      .25, .7, "Less accurate",
      -.25, .7, "More accurate"
    ) %>% 
      mutate(
        country = t3$country %>% 
          levels %>% 
          extract(5) %>% 
          factor(
            t3$country %>% 
              levels 
          ),
        horiz = t3$horiz %>% 
          levels %>% 
          extract(2:3) %>% 
          factor(
            t3$horiz %>% 
              levels
          )
      ), 
    size = 2.5,
    fontface = "bold.italic"
  ) +
  facet_grid(
    country ~ horiz, 
    scales = "free",
    space ="free"
    )  +
  scale_color_manual(
    values = c(
      "#377eb8",
      "#4daf4a",
      "#984ea3"
    )
  ) +
  scale_x_continuous(
    breaks = seq(
      -1, 4.5, .5
      ),
    labels = seq(
      -1, 4.5, .5
    ) %>% 
      str_replace_all("0\\.", "\\.")
    ) +
  scale_y_discrete(
    expand = expansion(add = c(.65, .65))
  ) +
  labs(
    x = "",
    y = "",
    color = ""
  ) +
  theme_minimal() +
  theme(
    panel.background  = 
      element_rect(color = "grey95",
                   fill = "grey95"),
    strip.background = 
      element_rect(color = "grey95",
                   fill = "grey95"),
    panel.grid = element_blank(),
    strip.text.x = element_text(
      size =  10.5, 
      angle = 0
    ),
    strip.text.y = element_text(
      size =  10.5, 
      angle = 0
    ),
    legend.position = "none"
  )
